Simulate a Number of Battles

The function tanks simulates reps battles with obs serial numbers recorded for each battle. The argument tanks is the number of tanks that the Germans had. The argument fixedest is the expert’s best guess.

tanks <- function (tanks = 84, obs = 5, reps = 100, fixedest = 87) 
{
    temp <- as.matrix(rep(tanks, reps))
    temp <- apply(temp, 1, sample, size = obs)
    temp <- t(apply(temp, 2, tanks.ests, fixedest))
    temp
}

Compute Estimates

The function tank.ests runs on a sample of observed serial numbers. Each of the vector values is the result of a student supplied estimator. These need to be changed to reflect the class’ ideas.

tanks.ests <- function (x = stop("Argument 'x' is missing"), fixedest = 125) 
{
    n <- length(x)
    ests <- rep(NA, 11)
    xbar <- mean(x)
    xmedian <- median(x)
    xn <- max(x)
    xvar <- var(x)
    xstddev <- sqrt(xvar)
    x1 <- min(x)
    rng <- c(-1,1)%*%range(x)
    xsum <- sum(x)
    xnm1 <- sort(x)[n-1]
    ests[1] <- xn
    ests[2] <- 2 * xbar
    ests[3] <- xn + x1
    ests[4] <- xn + rng/2
    ests[5] <- xbar + xstddev
    ests[6] <- 2 * xmedian
    ests[7] <- xn + xnm1
    ests[8] <- 2*xn - x1
    ests[9] <- xsum
    ests[10] <- xn * ((n+1)/n) - 1
    ests[11] <- fixedest
    names(ests) <- c("x[n]","2*xbar", "x[n]+x[1]", "x[n]+R/2", "xbar+s", "2*m", "x[n]+x[n-1]","2x[n]-x[1]","sum(x)", "UMVUE",fixedest)
    ests
}

Compute Descriptive Statistics

The function tanks.descriptives computes descriptive statistics for each of the estimators in tanks.ests.

tanks.descriptives <- function (n = 84, obs = 5, reps = 100, fixedest = 87) 
{
    temp <- tanks(n, obs, reps, fixedest)
    means <- apply(temp, 2, mean)
    stddevs <- sqrt(apply(temp, 2, var))
    medians <- apply(temp, 2, median)
    bias <- means - n
    mse <- bias^2 + stddevs^2
    t(cbind(means, stddevs, medians, bias, mse))
}

Plot Estimates

The individual estimates computed for the samples from each battle can be plotted. This allows us to compare location and dispersion statistics — center and spread. tanks.plots2 is intended to be an “improved” version of tanks.plots. Both plots use traditional lattice boxplots and there is a ggplot plot as well.

### Load the lattice package
p_load(lattice)

tanks.plots <- function (n = 84, obs = 5, reps = 100, fixedest = 87) 
{
    temp <- tanks(n, obs, reps, fixedest)
    tanknames <- attributes(temp)$dimnames[[2]]
    dims <- dim(temp)
    temp <- as.vector(t(temp))
    temp <- cbind(temp, rep(1:dims[2], dims[1]))
    bwplot(factor(temp[, 2], labels = tanknames) ~ 
        temp[, 1], xlab = "Number of Tanks", 
        ylab = "Estimator")
}

tanks.plots2 <- function (n = 84, obs = 5, reps = 100, fixedest = 87) 
{
    temp <- tanks(n, obs, reps, fixedest)
    tanknames <- attributes(temp)$dimnames[[2]]
    dims <- dim(temp)
    temp <- as.vector(t(temp))
    temp <- cbind(temp, rep(1:dims[2], dims[1]))
    bwplot(factor(temp[, 2], labels = tanknames) ~ 
        temp[, 1], xlab = "Number of Tanks", 
        ylab = "Estimator", panel = function (x , 
            y , vref = n, ... ) 
        {
            panel.bwplot(x, y)
            panel.abline(v = vref, lty = 2)
        }, vref = n)
}

p_load(ggplot2)
p_load(tidyverse)
tanks.plots3 <- function (n = 84, obs = 5, reps = 100, fixedest = 87) 
{
    temp <- tanks(n, obs, reps, fixedest)
    tanknames <- attributes(temp)$dimnames[[2]]
    dims <- dim(temp)
    temp <- as.vector(t(temp))
    temp <- cbind(temp, rep(1:dims[2], dims[1]))
    temp <- as_tibble(temp)
    names(temp) <- c("Estimate", "Estimator")
    temp$Estimator <- factor(temp$Estimator)
    ggplot(temp, aes(x=Estimator, y=Estimate)) +
      geom_boxplot(alpha=0.7) +
      stat_summary(fun=mean, geom="point", shape=20, size=5, color="red", fill="red") +
      theme(legend.position="none") +
      scale_fill_brewer(palette="Set1") +
      geom_hline(yintercept = n, alpha = 0.5, color = "blue", lty = 1) +
      coord_flip()
}

tanks.plots4 <- function (n = 84, obs = 5, reps = 100, fixedest = 87) 
{
    temp <- tanks(n, obs, reps, fixedest)
    temp <- melt(temp)
    names(temp) <- c("RowID","Estimator","Estimate")
    ggplot(temp, aes(x=Estimator, y=Estimate)) +
      geom_boxplot(alpha=0.7) +
      stat_summary(fun=mean, geom="point", shape=20, size=5, color="red", fill="red") +
      theme(legend.position="none") +
      scale_fill_brewer(palette="Set1") +
      geom_hline(yintercept = n, alpha = 0.5, color = "blue", lty = 1) +
      coord_flip()
}

Compare our Estimators

The class’ estimators can be compared using the functions defined above.

  ### Compute descriptive stats
  tanks.descriptives(n = 117, obs = 5, reps = 10000, fixedest = 125)
##              x[n]    2*xbar x[n]+x[1]   x[n]+R/2    xbar+s        2*m
## means    98.35270 117.57420 117.76210  137.82435  91.61257  117.19700
## stddevs  16.21762  29.51669  24.79664   24.09286  17.34818   43.47452
## medians 103.00000 117.60000 118.00000  142.50000  93.51191  118.00000
## bias    -18.64730   0.57420   0.76210   20.82435 -25.38743    0.19700
## mse     610.73300 871.56466 615.45399 1014.11932 945.48094 1890.07300
##         x[n]+x[n-1] 2x[n]-x[1]      sum(x)     UMVUE 125
## means     176.88310  177.29600   293.93550 117.02324 125
## stddevs    33.24717   33.25781    73.79172  19.46114   0
## medians   182.00000  182.00000   294.00000 122.60000 125
## bias       59.88310   60.29600   176.93550   0.02324   8
## mse      4691.35984 4741.68961 36751.38962 378.73667  64
  ### Plot the estimates from each of the estimators
  tanks.plots(n = 117, obs = 5, reps = 10000, fixedest = 125)

  ### Plot the estimates from each of the estimators
  tanks.plots2(n = 117, obs = 5, reps = 10000, fixedest = 125)

  ### Plot the estimates from each of the estimators
  tanks.plots4(n = 117, obs = 5, reps = 10000, fixedest = 125)

Note that \[\widehat{N} = X_{(n)} \frac{n+1}{n} - 1\] is UMVUE for \(N\) when the \(X_i\) are i.i.d. DU(1,\(N\)).